#### Figure A7, A8, and A9 Script ####
rm(list=ls());gc();gc();gc();gc();gc();gc();gc();gc()

library(gt)
library(Hmisc)
library(tidyverse)
library(dplyr)
library(hrbrthemes)
library(lfe)
library(stargazer)

here::i_am("Scripts/FigA7A8A9Script.R")

library(here)

## Figure A7 ####
#First let's get our baseline:
df<-readRDS(here("Data", "acrossanalysisset.rds"))


#Limit to just those non-event donors who gave more than once

#Get a designation for event and non-event donors
df<-df %>%
  group_by(finstate3dc)%>%
  dplyr::mutate(edon=ifelse(sum(fundraiser)>0,1,0))

#Find out how many donations each person has

df<-df %>%
  group_by(finstate3dc)%>%
  dplyr::mutate(numdons=n())

tabnumdonsedon<-table(df$edon, df$numdons)

#Also get a total for how much total they've donated

df<-df %>%
  group_by(finstate3dc)%>%
  dplyr::mutate(totdonamt=sum(cAMOUNT))


#Breakdown of event donors/ non-event donors

df%>%select(finstate3dc, edon)%>%
  unique()%>%
  group_by(edon)%>%
  dplyr::summarize(counttot=n(), perc=counttot/length(unique(df$finstate3dc)))



quantile(df$totdonamt, probs = seq(.1, .9, by = .1))

df$decile<-ntile(df$totdonamt, 10)


#This is our new sample - now carry out the same analysis as before

df %>%
  select(finstate3dc, edon,cSTATE)%>%
  unique()%>%
  group_by(edon,cSTATE)%>%
  dplyr::summarize(count=n())

#edon cSTATE count
#<dbl> <chr>  <int>
#1     0 KY     10012
#2     0 MI     29088
#3     0 OH     43072
#4     1 KY     21641
#5     1 MI     63230
#6     1 OH     82751


#Get the donor ids and their wsd

kypart<-df[df$cSTATE=="KY",]
kypartdons<-kypart%>%
  group_by(finstate3dc)%>%
  dplyr::mutate(edon=ifelse(sum(fundraiser)>0,1,0))%>%
  dplyr::mutate(wsd=sqrt(wtd.var(recipient.cfscore,weights=rcAMOUNT)))%>%
  ungroup%>%
  select(finstate3dc, wsd,edon,numdons, decile, totdonamt)%>%
  unique()
#saveRDS(kypartdons, "kypartdonsrobust.rds") 

mipart<-df[df$cSTATE=="MI",]
mipartdons<-mipart%>%
  group_by(finstate3dc)%>%
  dplyr::mutate(edon=ifelse(sum(fundraiser)>0,1,0))%>%
  dplyr::mutate(wsd=sqrt(wtd.var(recipient.cfscore,weights=rcAMOUNT)))%>%
  ungroup%>%
  select(finstate3dc, wsd,edon,numdons, decile, totdonamt)%>%
  unique()
#saveRDS(mipartdons, "mipartdonsrobust.rds") 

ohpart<-df[df$cSTATE=="OH",]
ohpartdons<-ohpart%>%
  group_by(finstate3dc)%>%
  dplyr::mutate(edon=ifelse(sum(fundraiser)>0,1,0))%>%
  dplyr::mutate(wsd=sqrt(wtd.var(recipient.cfscore,weights=rcAMOUNT)))%>%
  ungroup%>%
  select(finstate3dc, wsd,edon,numdons, decile, totdonamt)%>%
  unique()
#saveRDS(ohpartdons, "ohpartdonsrobust.rds") 


allpartdons<-bind_rows(kypartdons, mipartdons,ohpartdons)
#saveRDS(allpartdons, "allpartdonsrobust.rds")

#set seed and sample
set.seed(82)
numsims<-1200
kysamps<-as.data.frame(replicate(numsims,sample(kypartdons$edon,length(kypartdons$edon),replace = FALSE)))
#saveRDS(kysamps, "kysampsacrossrobust.rds")
misamps<-as.data.frame(replicate(numsims,sample(mipartdons$edon,length(mipartdons$edon),replace = FALSE)))
#saveRDS(misamps, "misampsacrossrobust.rds")
ohsamps<-as.data.frame(replicate(numsims,sample(ohpartdons$edon,length(ohpartdons$edon),replace = FALSE)))
#saveRDS(ohsamps, "ohsampsacrossrobust.rds")

#kysamps<-readRDS("kysampsacrossrobust.rds")
#misamps<-readRDS("misampsacrossrobust.rds")
#ohsamps<-readRDS("ohsampsacrossrobust.rds")

allsamps<-rbind(kysamps, misamps, ohsamps)
#saveRDS(allsamps, "allsampsacrossrobust.rds")


#Now attach to the important parts of the main dataset
#allsamps<-readRDS("allsampsacrossrobust.rds")
#allpartdons<-readRDS("allpartdonsrobust.rds")

dfan<-cbind(allpartdons, allsamps)

#saveRDS(dfan, "sampanalysisacrossrobust.rds")

#in each of these simulations, we have new "event donors" and "non-event donors"
#dfan<-readRDS("sampanalysisacrossrobust.rds")



#Mann-Whitney needs equal number of observations:
#split sample
edon1s<-dfan[dfan$edon==1,]
edon0s<-dfan[dfan$edon==0,]

#get 30K of each of these
edons50k1s<-sample_n(edon1s, 30000)
edons50k0s<-sample_n(edon0s, 30000)

wilcox<-wilcox.test(edons50k1s$wsd, edons50k0s$wsd, paired = TRUE, alternative = "greater")

#Let's try to do this for each of the simulations

#Going to want an empty place for results that is 1200 rows, 3 columns (simnum, wmeventdonor, wmnoneventdonors)
set.seed(82)

#Change this to 1200 simulations to match the within donor analysis
numsims<-1200

medresults <- vector(mode='list', length=numsims)
wilcresults<-vector(length=numsims)
wilcstatisticresults<-vector(length=numsims)

#Let's limit to donors who have between 3 and 30 donations

dfan<-dfan%>%
  filter(numdons>2 & numdons <31)

#Let's focus on how to do this once with our actual data, before looping through:
medians<-dfan%>%
  group_by(edon)%>%
  dplyr::summarize(medians=median(wsd))

sampdist<-medians[2,2]-medians[1,2]

sampdist

dfan<-dfan[,c(1:3, 7:1206)]



for (i in 1:numsims){
  print(i)
  dfan1<-dfan[,c(1,2, 3+i)]
  names(dfan1)[3]<-"edon"
  
  
  #Mann-Whitney needs equal number of observations:
  #split sample
  edon1s<-dfan1[dfan1$edon==1,]
  edon0s<-dfan1[dfan1$edon==0,]
  
  #get 50K of each of these
  edons50k1s<-sample_n(edon1s, 30000)
  edons50k0s<-sample_n(edon0s, 30000)
  
  wilcresults[i]<-wilcox.test(edons50k1s$wsd, edons50k0s$wsd, paired = TRUE, alternative = "greater")$p.value
  wilcstatisticresults[i]<-wilcox.test(edons50k1s$wsd, edons50k0s$wsd, paired = TRUE, alternative = "greater")$statistic
  
  medresults[[i]]<-dfan1 %>%
    group_by(edon)%>%
    dplyr::summarize(medians=median(wsd))
  
  
  
  
}
#saveRDS(medresults, "acrosssampsimresults1200robust.rds")
#saveRDS(wilcresults, "acrosssampsimwilcoxresults1200robust.rds")
#saveRDS(wilcstatisticresults, "acrosssampsimwilcoxSTATresults1200robust.rds")

#wilcstatisticresults<-readRDS("acrosssampsimwilcoxSTATresults1200robust.rds")



#Paper quality graphs - I want the distribution of distances first, with our distance 
# put in the legend maybe?

#medresults<-readRDS("acrosssampsimresults1200robust.rds")

distances<-unlist(lapply(medresults,function(x){
  x[2,2]-x[1,2]
}))

df<-data.frame(label="(SD of Event Donors - SD of Non-Event Donors) in Sample: .086",
               x = Inf, y = Inf, hjust = 0, vjust = 1)

breaks <- c(-.005,-.004,-.003,-.002,-.001,0,.001,.002,.003,.1)

p<-ggplot()+aes(distances) + 
  geom_histogram(aes(y=after_stat(density)),      # Histogram with density instead of count on y-axis
                 bins=300,
                 colour="black", fill="white") +
  geom_density(alpha=.2, fill="lightgrey")+
  labs(x="Difference between Median SD of Event Donors \n and Median SD of Non-Event Donors", y="Frequency")+
  theme_ipsum() +
  #geom_textbox(aes(label = 'Median SD of Event Donors - Median SD of Non-Event Donors in Sample: .086', x = Inf, y = Inf),
  #          hjust = 1, vjust = 1)+
  scale_x_continuous(breaks = round(seq(min(distances), .1, by = 0.02),2)) +
  theme(
    axis.text.x=element_text(size=18),
    axis.text.y=element_text(size=18),
    axis.title.y = element_text(size=20),
    axis.title.x = element_text(size=20),
    legend.text=element_text(size=20),
    legend.title=element_text(size=20),
    strip.text.x = element_text(size = 20))+
  geom_vline(xintercept = .086, linetype="longdash", linewidth = 3) + 
  geom_text(mapping = aes(x = .068, y = 300, label = "In-sample Value \n .086"),
            inherit.aes = FALSE,size=6)+
  geom_segment(aes(x = .075, y = 275, xend = .083, yend = 275),
               arrow = arrow(length = unit(0.5, "cm")))

p



ggsave(here("Results", "FigA7.png"), width = 12, height = 5)


## Figure A8 ####



##### Panel 1 ######

#allpartdons<-readRDS("allpartdonsrobust.rds")


medians<-allpartdons%>%
  group_by(edon, numdons)%>%
  dplyr::summarize(medians=median(wsd))%>%
  filter(numdons<31)


diffs<-medians%>%
  group_by(numdons)%>%
  mutate(wsddiff=medians[edon==1]-medians[edon==0])

#Cut this in half and then graph distribution
newdiffs<-diffs[1:29,]


ggplot(newdiffs, aes(x=numdons, y=wsddiff)) + 
  geom_bar(stat = "identity")

#We also want to overlay distribution of either number of donors 
#or total money each group involves

#Let's do number of donors

#Get sums of donors for all types (and sums of money)
dfdonnums<-allpartdons%>%
  group_by(numdons)%>%
  dplyr::summarize(sumdonors=n(), summon=sum(totdonamt))%>%
  filter(numdons<31)

#Match this to the diffs above

newdf<-merge(newdiffs, dfdonnums, by="numdons")

newdf$percdonors<-newdf$sumdonors/sum(newdf$sumdonors)

newdf$percmoney<-newdf$summon/sum(newdf$summon)

#New graph with overlay

coeff <- .5

p<-ggplot(newdf, aes(x=numdons)) +
  geom_bar(aes(y=percdonors),stat = "identity",alpha=.2, colour="black")+
  geom_bar(aes(y=wsddiff/coeff),stat = "identity",alpha=.5 )+
  theme_ipsum()+
  scale_y_continuous(
    
    # Features of the first axis
    name = "Percent of Sample Donors in Group",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*coeff, name="Difference in Median SDs")
  )+
  theme(
    axis.text.x=element_text(size=18),
    axis.text.y=element_text(size=18),
    axis.title.y = element_text(size=18),
    axis.title.x = element_text(size=18),
    axis.text.y.right = element_text(size=18),
    axis.title.y.right = element_text(size=18),
    legend.text=element_text(size=18),
    legend.title=element_text(size=18),
    strip.text.x = element_text(size = 18))+
  labs(x="Number of Donations Given")

p



ggsave(here("Results","FigA8Panel1.png"), width = 12, height = 5)



##### Panel 2 ######

coeff <- .75

p<- ggplot(newdf, aes(x=numdons)) +
  geom_bar(aes(y=percmoney),stat = "identity",alpha=.2, colour="black")+
  geom_bar(aes(y=wsddiff/coeff),stat = "identity",alpha=.5)+
  theme_ipsum()+
  scale_y_continuous(
    
    # Features of the first axis
    name = "Percent of Total Amount \n Given in Sample",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*coeff, name="Difference in Median SDs")
  )+
  theme(
    axis.text.x=element_text(size=18),
    axis.text.y=element_text(size=18),
    axis.title.y = element_text(size=18),
    axis.title.x = element_text(size=18),
    axis.text.y.right = element_text(size=18),
    axis.title.y.right = element_text(size=18),
    legend.text=element_text(size=18),
    legend.title=element_text(size=18),
    strip.text.x = element_text(size = 18))+
  labs(x="Number of Donations Given")
p


ggsave(here("Results", "FigA8Panel2.png"), width = 12, height = 5)


## Figure A9 ####

medians<-allpartdons%>%
  group_by(edon, decile)%>%
  dplyr::summarize(medians=median(wsd))


diffs<-medians%>%
  group_by(decile)%>%
  mutate(wsddiff=medians[edon==1]-medians[edon==0])

#Cut this in half and then graph distribution
newdiffs<-diffs[1:10,]


ggplot(newdiffs, aes(x=decile, y=wsddiff)) + 
  geom_bar(stat = "identity")

#We also want to overlay distribution of total money each group involves

#Let's do number of donors

#Get sums of donors for all types (and sums of money)
dfdonnums<-allpartdons%>%
  group_by(decile)%>%
  dplyr::summarize(summon=sum(totdonamt))

#Match this to the diffs above

newdf<-merge(newdiffs, dfdonnums, by="decile")


newdf$percmoney<-newdf$summon/sum(newdf$summon)

#New graph with overlay

coeff <- 2

p<- ggplot(newdf, aes(x=decile)) +
  geom_bar(aes(y=percmoney/coeff),stat = "identity", alpha=.7, colour="black")+
  geom_bar(aes(y=wsddiff),stat = "identity",alpha=.4)+
  theme_ipsum()+
  scale_y_continuous(
    
    # Features of the first axis
    name = "Difference in Median SDs",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*coeff, name="Percent of Total Amount \n Given in Sample")
  )+
  scale_x_continuous(breaks = round(seq(1,10, by = 1),2)) +
  theme(
    axis.text.x=element_text(size=18),
    axis.text.y=element_text(size=18),
    axis.title.y = element_text(size=18),
    axis.title.x = element_text(size=18),
    axis.text.y.right = element_text(size=18),
    axis.title.y.right = element_text(size=18),
    legend.text=element_text(size=18),
    legend.title=element_text(size=18),
    strip.text.x = element_text(size = 18))+
  labs(x="Decile of Total Giving")
p


ggsave(here("Results", "FigA9.png"), width = 12, height = 5)

